home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Disc to the Future 2
/
Disc to the Future Part II Programmer's Reference (Wayzata Technology)(6013)(1992).bin
/
MAC
/
MPW_TOOL
/
TOOLS
/
TOOLS_WI
/
ICON_8
/
BENCH_FO
/
RSG.ICN
< prev
next >
Wrap
Text File
|
1990-04-06
|
11KB
|
386 lines
############################################################################
#
# Name: rsg.icn
#
# Title: Generate randomly selected sentences from a grammar
#
# Author: Ralph E. Griswold
#
# Date: June 10, 1988
#
############################################################################
#
# This program generates randomly selected strings (``sen-
# tences'') from a grammar specified by the user. Grammars are
# basically context-free and resemble BNF in form, although there
# are a number of extensions.
#
# The program works interactively, allowing the user to build,
# test, modify, and save grammars. Input to rsg consists of various
# kinds of specifications, which can be intermixed:
#
# Productions define nonterminal symbols in a syntax similar to
# the rewriting rules of BNF with various alternatives consisting
# of the concatenation of nonterminal and terminal symbols. Gen-
# eration specifications cause the generation of a specified number
# of sentences from the language defined by a given nonterminal
# symbol. Grammar output specifications cause the definition of a
# specified nonterminal or the entire current grammar to be written
# to a given file. Source specifications cause subsequent input to
# be read from a specified file.
#
# In addition, any line beginning with # is considered to be a
# comment, while any line beginning with = causes the rest of that
# line to be used subsequently as a prompt to the user whenever rsg
# is ready for input (there normally is no prompt). A line consist-
# ing of a single = stops prompting.
#
# Productions: Examples of productions are:
#
# <expr>::=<term>|<term>+<expr>
# <term>::=<elem>|<elem>*<term>
# <elem>::=x|y|z|(<expr>)
#
# Productions may occur in any order. The definition for a nonter-
# minal symbol can be changed by specifying a new production for
# it.
#
# There are a number of special devices to facilitate the defin-
# ition of grammars, including eight predefined, built-in nontermi-
# nal symbols:
# symbol definition
# <lb> <
# <rb> >
# <vb> |
# <nl> newline
# <> empty string
# <&lcase> any single lowercase letter
# <&ucase> any single uppercase letter
# <&digit> any single digit
#
# In addition, if the string between a < and a > begins and ends
# with a single quotation mark, it stands for any single character
# between the quotation marks. For example,
#
# <'xyz'>
#
# is equivalent to
#
# x|y|z
#
# Generation Specifications: A generation specification consists of
# a nonterminal symbol followed by a nonnegative integer. An exam-
# ple is
#
# <expr>10
#
# which specifies the generation of 10 <expr>s. If the integer is
# omitted, it is assumed to be 1. Generated sentences are written
# to standard output.
#
# Grammar Output Specifications: A grammar output specification
# consists of a nonterminal symbol, followed by ->, followed by a
# file name. Such a specification causes the current definition of
# the nonterminal symbol to be written to the given file. If the
# file is omitted, standard output is assumed. If the nonterminal
# symbol is omitted, the entire grammar is written out. Thus,
#
# ->
#
# causes the entire grammar to be written to standard output.
#
# Source Specifications: A source specification consists of @ fol-
# lowed by a file name. Subsequent input is read from that file.
# When an end of file is encountered, input reverts to the previous
# file. Input files can be nested.
#
# Options: The following options are available:
#
# -s n Set the seed for random generation to n. The default
# seed is 0.
#
# -l n Terminate generation if the number of symbols remaining
# to be processed exceeds n. The default is limit is 1000.
#
# -t Trace the generation of sentences. Trace output goes to
# standard error output.
#
# Diagnostics: Syntactically erroneous input lines are noted but
# are otherwise ignored. Specifications for a file that cannot be
# opened are noted and treated as erroneous.
#
# If an undefined nonterminal symbol is encountered during gen-
# eration, an error message that identifies the undefined symbol is
# produced, followed by the partial sentence generated to that
# point. Exceeding the limit of symbols remaining to be generated
# as specified by the -l option is handled similarly.
#
# Caveats: Generation may fail to terminate because of a loop in
# the rewriting rules or, more seriously, because of the progres-
# sive accumulation of nonterminal symbols. The latter problem can
# be identified by using the -t option and controlled by using the
# -l option. The problem often can be circumvented by duplicating
# alternatives that lead to fewer rather than more nonterminal sym-
# bols. For example, changing
#
# <term>::=<elem>|<elem>*<term>
#
# to
#
# <term>::=<elem>|<elem>|<elem>*<term>
#
# increases the probability of selecting <elem> from 1/2 to 2/3.
#
# There are many possible extensions to the program. One of the
# most useful would be a way to specify the probability of select-
# ing an alternative.
#
############################################################################
#
# Links: options, post
#
############################################################################
link options, post
global defs, ifile, in, limit, prompt, tswitch
record nonterm(name)
record charset(chars)
procedure main(args)
local line, plist, s, opts
Init__()
# procedures to try on input lines
plist := [define,generate,grammar,source,comment,prompter,error]
defs := table() # table of definitions
defs["lb"] := [["<"]] # built-in definitions
defs["rb"] := [[">"]]
defs["vb"] := [["|"]]
defs["nl"] := [["\n"]]
defs[""] := [[""]]
defs["&lcase"] := [[charset(&lcase)]]
defs["&ucase"] := [[charset(&ucase)]]
defs["&digit"] := [[charset(&digits)]]
opts := options(args,"tl+s+")
limit := \opts["l"] | 1000
tswitch := \opts["t"]
&random := \opts["s"]
ifile := [&input] # stack of input files
prompt := ""
while in := pop(ifile) do { # process all files
repeat {
if *prompt ~= 0 then writes(prompt)
line := read(in) | break
while line[-1] == "\\" do line := line[1:-1] || read(in) | break
(!plist)(line)
}
close(in)
}
Term__()
end
# process alternatives
#
procedure alts(defn)
local alist
alist := []
defn ? while put(alist,syms(tab(upto('|') | 0))) do move(1) | break
return alist
end
# look for comment
#
procedure comment(line)
if line[1] == "#" then return
end
# look for definition
#
procedure define(line)
return line ?
defs[(="<",tab(find(">::=")))] := (move(4),alts(tab(0)))
end
# define nonterminal
#
procedure defnon(sym)
local chars, name
if sym ? {
="'" &
chars := cset(tab(-1)) &
="'"
}
then return charset(chars)
else return nonterm(sym)
end
# note erroneous input line
#
procedure error(line)
write("*** erroneous line: ",line)
return
end
# generate sentences
#
procedure gener(goal)
local pending, symbol
pending := [nonterm(goal)]
while symbol := get(pending) do {
if \tswitch then
write(&errout,symimage(symbol),listimage(pending))
case type(symbol) of {
"string": writes(symbol)
"charset": writes(?symbol.chars)
"nonterm": {
pending := ?\defs[symbol.name] ||| pending | {
write(&errout,"*** undefined nonterminal: <",symbol.name,">")
break
}
if *pending > \limit then {
write(&errout,"*** excessive symbols remaining")
break
}
}
}
}
write()
end
# look for generation specification
#
procedure generate(line)
local goal, count
if line ? {
="<" &
goal := tab(upto('>')) \ 1 &
move(1) &
count := (pos(0) & 1) | integer(tab(0))
}
then {
every 1 to count do
gener(goal)
return
}
else fail
end
# get right hand side of production
#
procedure getrhs(a)
local rhs
rhs := ""
every rhs ||:= listimage(!a) || "|"
return rhs[1:-1]
end
# look for request to write out grammar
#
procedure grammar(line)
local file, out, name
if line ? {
name := tab(find("->")) &
move(2) &
file := tab(0) &
out := if *file = 0 then &output else {
open(file,"w") | {
write(&errout,"*** cannot open ",file)
fail
}
}
}
then {
(*name = 0) | (name[1] == "<" & name[-1] == ">") | fail
pwrite(name,out)
if *file ~= 0 then close(out)
return
}
else fail
end
# produce image of list of grammar symbols
#
procedure listimage(a)
local s, x
s := ""
every x := !a do
s ||:= symimage(x)
return s
end
# look for new prompt symbol
#
procedure prompter(line)
if line[1] == "=" then {
prompt := line[2:0]
return
}
end
# write out grammar
#
procedure pwrite(name,ofile)
local nt, a
static builtin
initial builtin := ["lb","rb","vb","nl","","&lcase","&ucase","&digit"]
if *name = 0 then {
a := sort(defs,3)
while nt := get(a) do {
if nt == !builtin then {
get(a)
next
}
write(ofile,"<",nt,">::=",getrhs(get(a)))
}
}
else write(ofile,name,"::=",getrhs(\defs[name[2:-1]])) |
write("*** undefined nonterminal: ",name)
end
# look for file with input
#
procedure source(line)
local file, new
return line ? {
if ="@" then {
new := open(file := tab(0)) | {
write(&errout,"*** cannot open ",file)
fail
}
push(ifile,in) &
in := new
return
}
}
end
# produce string image of grammar symbol
#
procedure symimage(x)
return case type(x) of {
"string": x
"nonterm": "<" || x.name || ">"
"charset": "<'" || x.chars || "'>"
}
end
# process the symbols in an alternative
#
procedure syms(alt)
local slist
static nonbrack
initial nonbrack := ~'<'
slist := []
alt ? while put(slist,tab(many(nonbrack)) |
defnon(2(="<",tab(upto('>')),move(1))))
return slist
end